import requests
import parsel
import re
import json
import 名菜美团店铺推荐 as recommend

class FoodDetail:

    def __init__(self, name):
        self.name = name
        self.url = f'https://baike.baidu.com/item/{self.name}'
        self.headers = {
            'Referer': 'https://baike.baidu.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
        }
        self.cookies = {
            'BIDUPSID': '0AFEF92544B0252A6C9F92FBD5655A75',
            'PSTM': '1622883408',
            'BAIDUID': '947192445B5C1BDB32C3084B3073E7B3:SL=0:NR=10:FG=1',
            'BDUSS': '2ZSN3ZIRVRiMzJqQ1kxOW9PSnM2Um9WV0xtVlFQbWZkRENOMGJKVTdwQ0FCcE5sSVFBQUFBJCQAAAAAAAAAAAEAAADA9adwc29uYVRuVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIB5a2WAeWtlM',
            'BDUSS_BFESS': '2ZSN3ZIRVRiMzJqQ1kxOW9PSnM2Um9WV0xtVlFQbWZkRENOMGJKVTdwQ0FCcE5sSVFBQUFBJCQAAAAAAAAAAAEAAADA9adwc29uYVRuVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIB5a2WAeWtlM',
            'Hm_lvt_55b574651fcae74b0a9f1cf9c8d7c93a': '1705068458',
            'baikeVisitId': '00392192-3816-4fd9-b7c4-b7afcc80b384',
            'H_WISE_SIDS_BFESS': '40042_39938_40201_39661',
            'H_PS_PSSID': '40201_39661_40206_40211_40216_40224',
            'H_WISE_SIDS': '40201_39661_40206_40211_40216_40224',
            'MCITY': '-%3A',
            'ZFY': 'FqI81JbRldwEa5fTz16UA72vEm0yBZMmTd62PmpHeXc:C',
            'BAIDUID_BFESS': '947192445B5C1BDB32C3084B3073E7B3:SL=0:NR=10:FG=1',
            'BDORZ': 'FFFB88E999055A3F8A630C64834BD6D0',
            'BDRCVFR[C0sZzZJZb70]': 'mk3SLVN4HKm',
            'PSINO': '3',
            'BA_HECTOR': '24848h0h042ka5a0ah8g018gchjquc1istaaq1t',
            'channel': 'baidusearch',
            'zhishiTopicRequestTime': '1708042589243',
            'BK_SEARCHLOG': '%7B%22key%22%3A%5B%22%E6%89%A3%E8%82%89%22%2C%22%E9%9D%A2%E5%AD%90%E6%89%A3%E8%82%89%22%5D%7D',
            'ab_sr': '1.0.1_YjQ3ZGUyMzZmYTZhZjgwZTI3NDNhOTA0ZDljOGY5OWZiMTcxMjc1OGEzYjE0NjRiOThhZDM2YWE0YTI1YjRlMTMzMGQxODcwMzRjZmQ2ODNhMzVmYWEyYjE1MzEwM2RhNjhmOGZhYjM4MDZjNjc1NzE3NTkxOWU1ZTE5NzdhYTUwOTExYTYyOTI5ZWJlZmQzMjFhNGJjNGVhMjNmY2QwYmM5MzAxY2NkNDRlZjIzZjQ4ZTkzZjcyMWE4YzFjZTBl',
        }
        self.html = ""
        self.introduce = ""

    def get_data(self):
        try:
            resp = requests.get(self.url, headers=self.headers)
            self.html = resp.text
        except:
            pass

    def parse_data(self):
        selector = parsel.Selector(text=self.html)
        summary_tag = selector.css("div.lemmaSummary_nklAo.J-summary").get()
        if summary_tag:
            temp_list = []
            lst = re.findall(r'<.*?>(.*?)</.*?>', summary_tag, re.S)
            for i in lst:
                if i.startswith('<'):
                    temp_list.append(re.sub(r'<(.*?)>', "", i))
                else:
                    temp_list.append(i)
            self.introduce = "".join(temp_list)
        else:
            self.introduce = ""

    def save_data(self):
        data_dict = {
            "name": self.name,            # 菜品名字
            "introduce": self.introduce,  # 菜品介绍
            "feature": "",
            "recommended_shop": recommend.main(self.name)
        }

        with open('../data/data1.json', 'a+', encoding='utf-8') as f:
            json.dump(data_dict, f, ensure_ascii=False, indent=4)
            f.write(',\n')

        print(data_dict)

    def run(self):
        # 获取数据
        self.get_data()
        # 解析数据
        self.parse_data()
        # 写入数据
        self.save_data()


def main(name="辣椒炒肉"):
    spider = FoodDetail(name=name)
    spider.run()


if __name__ == '__main__':
    spider = FoodDetail(name="外婆菜")
    spider.run()
